#install.packages("lubridate")
#install.packages("ggplot2")
#install.packages("forecast")
#install.packages("here")
#install.packages("knitr")
#install.packages("kableExtra")
#install.packages("dplyr")
#install.packages("openxlsx")
#install.packages("ggthemes")
#install.packages("tidyr")
#install.packages("GGally")
#install.packages("tseries")
#install.packages("blorr")
#install.packages("car")
#install.packages("corrplot")
#install.packages("dlm")
#install.packages("randomForest")
#install.packages("Kendall")
library(lubridate)
library(ggplot2)
library(forecast)
library(here)
library(knitr)
library(kableExtra)
library(dplyr)
library(openxlsx)
library(ggthemes)
library(tidyr)
library(Kendall)
library(GGally)
library(trend)
library(tseries)
library(blorr)
library(lmtest)
library(car)
library(cowplot)
library(corrplot)
here()
raw_water<-read.csv(here('Data/Processed/groundwater_processed.csv'),stringsAsFactors = TRUE,skip=0,header=TRUE)
raw_preci<-read.csv(here('Data/Processed/preci_processed.csv'),stringsAsFactors = TRUE,skip=0,header=TRUE)
raw_temp<-read.csv(here('Data/Processed/temp_processed.csv'),stringsAsFactors = TRUE,skip=0,header=TRUE)
raw_water<-raw_water[,2:10]
raw_preci<-raw_preci[,2:3]
raw_temp<-raw_temp[,2:3]
#date format
raw_water$Date<-ymd(raw_water$Date)
raw_temp$Date<-ymd(raw_temp$Date)
raw_preci$Date<-ymd(raw_preci$Dat)
#daily average
df_water<-raw_water %>%
group_by(Date) %>%
summarize(water=mean(Groundwater_level, na.rm=TRUE))
#check timeframe
print(head(df_water$Date,1)) #1993-02-01
## [1] "1990-01-16"
print(tail(df_water$Date,1)) #2022-01-26
## [1] "2022-01-26"
print(head(raw_temp$Date,1)) #1990-01-01
## [1] "1990-01-01"
print(tail(raw_temp$Date,1)) #2024-03-01
## [1] "2024-03-01"
#monthly average
df_water_monthly<-df_water %>%
group_by(month=format(Date, "%Y-%m")) %>%
summarize(monthly_avg = mean(water, na.rm = TRUE))
#merge precipitation dataframe and temperature dataframe
raw_temp_preci<-left_join(raw_temp,raw_preci,by="Date")
#missing data
#create time dataframe
start_time<-ymd("1993-02-01")
end_time<-ymd("2022-01-26")
df_time_daily<-data.frame(Date=seq(from=start_time, to=end_time, by="1 day"))
df_time_monthly<-data.frame(Date=seq(from=start_time, to=end_time, by="1 month"))
#merge dataframe
df_water_merge<-left_join(df_time_daily,df_water,by="Date")
df_temp_preci_merge<-left_join(df_time_monthly,raw_temp_preci,by="Date")
head(df_water_merge)
## Date water
## 1 1993-02-01 NA
## 2 1993-02-02 NA
## 3 1993-02-03 NA
## 4 1993-02-04 NA
## 5 1993-02-05 NA
## 6 1993-02-06 NA
head(df_temp_preci_merge)
## Date Temperature Precipitation
## 1 1993-02-01 52.4 2.46
## 2 1993-03-01 59.0 1.47
## 3 1993-04-01 66.6 0.00
## 4 1993-05-01 76.7 0.48
## 5 1993-06-01 83.2 0.01
## 6 1993-07-01 86.3 0.57
#filter missing data
df_water_missing<-df_water_merge %>%
filter(is.na(water)) #continuous data since 2001-03-01
df_temp_preci_merge %>%
filter(is.na(Temperature)) #no missing value
## [1] Date Temperature Precipitation
## <0 rows> (or 0-length row.names)
#drop the missing value
df_water<-df_water_merge %>%
filter(Date>=ymd("2001-03-01") & Date<=ymd("2021-10-22"))
df_temp_preci<-df_temp_preci_merge %>%
filter(Date>=ymd("2001-03-01")& Date<=ymd("2021-10-22"))
df_temp_preci2<-df_temp_preci_merge %>%
filter(Date>=ymd("2002-02-01")& Date<=ymd("2021-10-22"))
#water is daily data
#temp and preci are monthly data
#plotting original data
plot(df_water[,2], type = "l")

plot
## function (x, y, ...)
## UseMethod("plot")
## <bytecode: 0x7ff63346b0e8>
## <environment: namespace:base>
#monthly average
df_water_monthly<-df_water %>%
group_by(month=format(Date, "%Y-%m")) %>%
summarize(monthly_avg = mean(water, na.rm = TRUE))
df_water_monthly$month <- ym(df_water_monthly$month)
df_water_monthly <- df_water_monthly %>%
filter(month>=as.Date("2002-02-01"))
#water
#msts_water<-msts(df_water[,2],seasonal.periods=c(7,365),start =c(2001,03), end=c(2021,10))
ts_water_monthly<-ts(df_water_monthly[,2], frequency=12,start =c(2002,02), end=c(2021,10))
#autoplot(msts_water)
autoplot(ts_water_monthly)

#question
#ts_water_monthly<-ts(df_water_monthly[,2], frequency=12,start =c(2010,01), end=c(2021,10))
#water
par(mfrow=c(1,2))
Acf(ts_water_monthly,lag.max=40,main=paste("ACF for Groundwater"),ylim=c(-0.5,1))
Pacf(ts_water_monthly,lag.max=40,main=paste("PACF for Groundwater"),ylim=c(-0.5,1))

plot_grid(
autoplot(ts_water_monthly),
autoplot(Acf(ts_water_monthly,plot=FALSE,lag.max=40)),
autoplot(Pacf(ts_water_monthly,plot=FALSE,lag.max=40)),
nrow=1
)

#decompose
water_decompose<-decompose(ts_water_monthly)
plot(water_decompose)

#deseason
water_deseason<-seasadj(water_decompose)
plot_grid(
autoplot(water_deseason),
autoplot(Acf(water_deseason,plot=FALSE,lag.max=40)),
autoplot(Pacf(water_deseason,plot=FALSE,lag.max=40)),
nrow=1
)

par(mfrow=c(1,2))
Acf(water_deseason,lag.max=40,main=paste("ACF for Groundwater"),ylim=c(-0.5,1))
Pacf(water_deseason,lag.max=40,main=paste("PACF for Groundwater"),ylim=c(-0.5,1))

#ts; monthly
#Mann-Kendall
summary(MannKendall(water_deseason)) #reject the null hypothesis, supporting the presence of a trend
## Score = 808 , Var(Score) = 1488413
## denominator = 27966
## tau = 0.0289, 2-sided pvalue =0.50831
#seasonal Mann-Kendall
#correction: calculate monthly average
trend::smk.test(water_deseason)
##
## Seasonal Mann-Kendall trend test (Hirsch-Slack test)
##
## data: water_deseason
## z = 0.53391, p-value = 0.5934
## alternative hypothesis: true S is not equal to 0
## sample estimates:
## S varS
## 57 11001
#ADF test
print(adf.test(water_deseason,alternative = "stationary")) #reject the null hypothesis, the trend is stationary
##
## Augmented Dickey-Fuller Test
##
## data: water_deseason
## Dickey-Fuller = -3.0006, Lag order = 6, p-value = 0.1552
## alternative hypothesis: stationary
#not sure why we are running it on deseasoned data here since this test can handle seasonality
print(adf.test(ts_water_monthly,alternative = "stationary"))
##
## Augmented Dickey-Fuller Test
##
## data: ts_water_monthly
## Dickey-Fuller = -3.0442, Lag order = 6, p-value = 0.1368
## alternative hypothesis: stationary
#p-value>0.05 so null hypothesis cant be rejected and trend is stochastic
SMKtest<- SeasonalMannKendall(ts_water_monthly)
print(summary(SMKtest))
## Score = 57 , Var(Score) = 11001
## denominator = 2223
## tau = 0.0256, 2-sided pvalue =0.58682
## NULL
#ts; monthly
#find out how many times is needed for differencing
#ns_diff<-nsdiffs(water_deseason)
#cat("Number of seasonal differencing needed:", ns_diff) #no need to difference the series
#question: does not need to difference the series but has trend in the data
#this is no need for seasonal differencing
n_diff<- ndiffs(ts_water_monthly)
cat("Number of trend differencing needed:", n_diff)
## Number of trend differencing needed: 1
#differencing is still needed for the stationary trend, d=1
#water
#from 2001-03-01 to 2021-10-22
#monthly ts
#ts_training<-ts(df_water_monthly[,2],
#frequency=12,
#start=c(2002,02), end=c(2020,09))
#I'm not sure why the start=c(2010,01) says 2010. Shouldn't it be 2001?
#ts_testing<-ts(df_water_monthly[,2],
#frequency=12,
#start=c(2020,10), end=c(2021,10))
nobs <- nrow(df_water_monthly)
nfor <- 12
ts_training2 <- subset(ts_water_monthly, end=length(ts_water_monthly)-nfor)
ts_testing2 <- subset(ts_water_monthly, start = length(ts_water_monthly)-nfor+1)
autoplot(ts_training2)

autoplot(ts_testing2)

#ts(ts_water_monthly[1:(nobs-nfor)],
#start=c(2002,02),
#frequency = 12)
#ts_water_monthly[(nobs-nfor+1):nobs]
#head(ts_testing2)
par(mfrow=c(1,1))
#SARIMA
sarima_fit <- auto.arima(ts_training2)
checkresiduals(sarima_fit)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,2)
## Q* = 20.844, df = 21, p-value = 0.4685
##
## Model df: 3. Total lags used: 24
sarima_for <- forecast(sarima_fit,h=nfor)
plot(sarima_for)

sarima_score <- accuracy(sarima_for$mean, ts_testing2)
ETS_fit <- stlf(ts_training2,h=nfor)
ETS_score <- accuracy(ETS_fit$mean, ts_testing2)
autoplot(ts_water_monthly)+
autolayer(ETS_fit, series="STL +ETS", PI=FALSE)+
autolayer(sarima_for, series = "SARIMA", PI=FALSE)

SS_ll <- StructTS(ts_training2, type="level")
SS_llt <- StructTS(ts_training2, type="trend")
SS_bsm <- StructTS(ts_training2, type="BSM")
SS_ll_for <- forecast(SS_ll,h=nfor)
plot(SS_ll_for)

SS_ll_score <- accuracy(SS_ll_for$mean,ts_testing2)
SS_llt_for <- forecast(SS_llt,h=nfor)
plot(SS_llt_for)

SS_llt_score <- accuracy(SS_llt_for$mean,ts_testing2)
SS_bsm_for <- forecast(SS_bsm,h=nfor)
plot(SS_bsm_for)

SS_bsm_score <- accuracy(SS_bsm_for$mean,ts_testing2)
#ETS is better if looking at MAPE
#ARIMA +Fourier
ARIMA_Four_fit_1<-auto.arima(ts_training2,
seasonal=FALSE,
xreg=fourier(ts_training2,K=c(2)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_1<-forecast(ARIMA_Four_fit_1,
xreg=fourier(ts_training2,K=c(2),h=nfor),
h=nfor)
#K=c(4)
ARIMA_Four_fit_2<-auto.arima(ts_training2,
seasonal=FALSE,
xreg=fourier(ts_training2,K=c(4)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_2<-forecast(ARIMA_Four_fit_2,
xreg=fourier(ts_training2,K=c(4),h=nfor),
h=nfor)
#K=c(6)
ARIMA_Four_fit_3<-auto.arima(ts_training2,
seasonal=FALSE,
xreg=fourier(ts_training2,K=c(6)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_3<-forecast(ARIMA_Four_fit_3,
xreg=fourier(ts_training2,K=c(6),h=nfor),
h=nfor)
scores_ARIMA_Four_forecast_1<-accuracy(ARIMA_Four_forecast_1$mean,ts_testing2)
scores_ARIMA_Four_forecast_2<-accuracy(ARIMA_Four_forecast_2$mean,ts_testing2)
scores_ARIMA_Four_forecast_3<-accuracy(ARIMA_Four_forecast_3$mean,ts_testing2)
#temp
#create time series object
#ts_temp_training<-ts(df_temp_preci[,2], frequency=12,
#start=c(2010,01), end=c(2020,09))
#ts_temp_testing<-ts(df_temp_preci[,2], frequency=12,
#start=c(2020,10), end=c(2021,10))
ts_temp_monthly<-ts(df_temp_preci2[,2], frequency=12,start =c(2002,02), end=c(2021,10))
ts_preci_monthly<-ts(df_temp_preci2[,3], frequency=12,start =c(2002,02), end=c(2021,10))
#preci
#create time series object
#ts_preci_training<-ts(df_temp_preci[,3], frequency=12,
#start=c(2010,01), end=c(2020,09))
#ts_preci_testing<-ts(df_temp_preci[,3], frequency=12,
#start=c(2020,10), end=c(2021,10))
#ts_temp<-ts(df_temp_preci[,2], frequency=12,start=c(2010,01), end=c(2021,10))
#ts_preci<-ts(df_temp_preci[,3], frequency=12,start=c(2010,01), end=c(2021,10))
autoplot(ts_temp_monthly)

autoplot(ts_preci_monthly)

ts_temp_training2 <- subset(ts_temp_monthly, end=length(ts_temp_monthly)-nfor)
ts_temp_testing2 <- subset(ts_temp_monthly, start = length(ts_temp_monthly)-nfor+1)
ts_preci_training2 <- subset(ts_preci_monthly, end=length(ts_preci_monthly)-nfor)
ts_preci_testing2 <- subset(ts_preci_monthly, start = length(ts_preci_monthly)-nfor+1)
#correlation test
cor_water_temp<-cor.test(ts_water_monthly,ts_temp_monthly,method = "spearman",exact = FALSE)
cor_water_preci<-cor.test(ts_water_monthly,ts_preci_monthly,method = "spearman",exact = FALSE)
#create a summary table
summary_table_cor<-data.frame(
Variable = c("Temperature", "Precipitation"),
Correlation_Coefficient = round(c(cor_water_temp$estimate,
cor_water_preci$estimate), 5),
P_Value = round(c(cor_water_temp$p.value,
cor_water_preci$p.value), 5)
)
print(summary_table_cor)
## Variable Correlation_Coefficient P_Value
## 1 Temperature -0.00128 0.98438
## 2 Precipitation 0.06314 0.33315
#decide not to use the exogenous variable
#K=c(2)
#fit ARIMA with deseason time series
xreg1 <- as.matrix(data.frame(fourier(ts_training2, K=c(2)),"preci"=ts_preci_training2))
ARIMA_Four_fit_XREG_1<-auto.arima(ts_training2,
seasonal=FALSE,
#combine fourier and exogenous variable time series
xreg=xreg1)
#generate fourier terms
#fourier_terms_1<-fourier(ts_training2, K=c(2), h=12)
#generate forecast
preci_forecast_1<-forecast(ts_preci_training2, h=12)
#combine fourier terms and forecasts into a numeric matrix
xreg1_for<-as.matrix(data.frame(fourier(ts_training2, K=c(2), h=12), "preci"=preci_forecast_1$mean))
#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_1<-forecast(ARIMA_Four_fit_XREG_1,
xreg=xreg1_for,
h=12)
#K=c(4)
xreg2 <- as.matrix(data.frame(fourier(ts_training2, K=c(4)),"preci"=ts_preci_training2))
ARIMA_Four_fit_XREG_2<-auto.arima(ts_training2,
seasonal=FALSE,
#combine fourier and exogenous variable time series
xreg=xreg2)
#generate fourier terms
#fourier_terms_1<-fourier(ts_training2, K=c(2), h=12)
#generate forecast
preci_forecast_2<-forecast(ts_preci_training2, h=12)
#combine fourier terms and forecasts into a numeric matrix
xreg2_for<-as.matrix(data.frame(fourier(ts_training2, K=c(4), h=12), "preci"=preci_forecast_2$mean))
#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_2<-forecast(ARIMA_Four_fit_XREG_2,
xreg=xreg2_for,
h=12)
#K=c(6)
xreg3 <- as.matrix(data.frame(fourier(ts_training2, K=c(6)),"preci"=ts_preci_training2))
ARIMA_Four_fit_XREG_3<-auto.arima(ts_training2,
seasonal=FALSE,
#combine fourier and exogenous variable time series
xreg=xreg3)
#generate fourier terms
#fourier_terms_1<-fourier(ts_training2, K=c(2), h=12)
#generate forecast
preci_forecast_3<-forecast(ts_preci_training2, h=12)
#combine fourier terms and forecasts into a numeric matrix
xreg3_for<-as.matrix(data.frame(fourier(ts_training2, K=c(6), h=12), "preci"=preci_forecast_3$mean))
#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_3<-forecast(ARIMA_Four_fit_XREG_3,
xreg=xreg3_for,
h=12)
ARIMA_Four_XREG_1_score<-accuracy(ARIMA_Four_forecast_XREG_1$mean,ts_testing2)
ARIMA_Four_XREG_2_score<-accuracy(ARIMA_Four_forecast_XREG_2$mean,ts_testing2)
ARIMA_Four_XREG_3_score<-accuracy(ARIMA_Four_forecast_XREG_3$mean,ts_testing2)
#p=1, P=0 because seasonal=FALSE
#K=c(2)
NN_fit_1<-nnetar(ts_training2,
p=1,
P=0,
xreg=fourier(ts_training2,K=c(2)))
NN_forecast_1<-forecast(NN_fit_1,
xreg=fourier(ts_training2,K=c(2),h=12),
h=12)
#K=c(4)
NN_fit_2<-nnetar(ts_training2,
p=1,
P=0,
xreg=fourier(ts_training2,K=c(4)))
NN_forecast_2<-forecast(NN_fit_2,
xreg=fourier(ts_training2,K=c(4),h=12),
h=12)
#K=c(6)
NN_fit_3<-nnetar(ts_training2,
p=1,
P=0,
xreg=fourier(ts_training2,K=c(6)))
NN_forecast_3<-forecast(NN_fit_3,
xreg=fourier(ts_training2,K=c(6),h=12),
h=12)
NN1_score <- accuracy(NN_forecast_1$mean,ts_testing2)
NN2_score <- accuracy(NN_forecast_2$mean,ts_testing2)
NN3_score <- accuracy(NN_forecast_3$mean,ts_testing2)
#Model1:ETS
#scores_ETS<-accuracy(ETS_fit$mean,ts_testing)
#Model2: ARIMA + Fourier, K=c(2)
#scores_ARIMA_Four_forecast_1<-accuracy(ARIMA_Four_forecast_1$mean,ts_testing)
#Model3: ARIMA + Fourier, K=c(4)
#scores_ARIMA_Four_forecast_2<-accuracy(ARIMA_Four_forecast_2$mean,ts_testing)
#Model4: ARIMA + Fourier, K=c(6)
#scores_ARIMA_Four_forecast_3<-accuracy(ARIMA_Four_forecast_3$mean,ts_testing)
#Model5: Neural network, K=c(2)
#scores_NN_1<-accuracy(NN_forecast_1$mean,ts_testing)
#Model6: Neural network, K=c(4)
#scores_NN_2<-accuracy(NN_forecast_2$mean,ts_testing)
#Model7: Neural network, K=c(6)
#scores_NN_3<-accuracy(NN_forecast_3$mean,ts_testing)
#Model8: ARIMA + Fourier + XREG, K=c(2)
#scores_ARIMA_Four_XREG_1<-accuracy(ARIMA_Four_forecast_XREG_1$mean,ts_testing)
#Model8: ARIMA + Fourier + XREG, K=c(4)
#scores_ARIMA_Four_XREG_2<-accuracy(ARIMA_Four_forecast_XREG_2$mean,ts_testing)
#Model9: ARIMA + Fourier + XREG, K=c(6)
#scores_ARIMA_Four_XREG_3<-accuracy(ARIMA_Four_forecast_XREG_3$mean,ts_testing)
#create a data frame to store the scores
scores<-as.data.frame(rbind(sarima_score,
ETS_score,
SS_ll_score,
SS_llt_score,
SS_bsm_score,
scores_ARIMA_Four_forecast_1,
scores_ARIMA_Four_forecast_2,
scores_ARIMA_Four_forecast_3,
NN1_score,
NN2_score,
NN3_score,
ARIMA_Four_XREG_1_score,
ARIMA_Four_XREG_2_score,
ARIMA_Four_XREG_3_score))
#scores_NN_all<-as.data.frame(rbind(
#scores_NN_1,
#scores_NN_2,
#scores_NN_3))
row.names(scores)<-c("SARIMA",
"ETS",
"SS LL",
"SS LLT",
"SS BSM",
"ARIMA Fourier K=c(2)",
"ARIMA Fourier K=c(4)",
"ARIMA Fourier K=c(6)",
"NN, K=c(2)",
"NN, K=c(4)",
"NN, K=c(6)",
"ARIMA Fourier K=c(2), Preci",
"ARIMA Fourier K=c(4), Preci",
"ARIMA Fourier K=c(6), Preci")
#row.names(scores_NN_all)<-c(
#"NN, K=c(2)",
#"NN, K=c(4)",
#"NN, K=c(6)")
#create a comparable table
kbl(scores,
caption = "Forecast Accuracy for Groundwater Level",
digits = array(5,ncol(scores))) %>%
kable_styling(full_width = FALSE, position = "center", latex_options = "hold_position")
Forecast Accuracy for Groundwater Level
|
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
ACF1
|
Theil’s U
|
|
SARIMA
|
-37.75665
|
44.82988
|
40.73087
|
-17.71579
|
18.76675
|
-0.29561
|
1.42512
|
|
ETS
|
-41.43256
|
48.61780
|
43.34761
|
-19.40871
|
20.08541
|
-0.27734
|
1.54607
|
|
SS LL
|
-44.42955
|
51.01336
|
45.82536
|
-20.70269
|
21.19591
|
-0.27103
|
1.62979
|
|
SS LLT
|
-49.08918
|
55.45828
|
49.17074
|
-22.78976
|
22.81858
|
-0.25569
|
1.77316
|
|
SS BSM
|
-46.07920
|
52.56657
|
46.92513
|
-21.44167
|
21.74058
|
-0.27533
|
1.67848
|
|
ARIMA Fourier K=c(2)
|
-37.24543
|
44.27194
|
40.05454
|
-17.48731
|
18.47993
|
-0.31483
|
1.40568
|
|
ARIMA Fourier K=c(4)
|
-37.37725
|
44.44457
|
40.05086
|
-17.54926
|
18.49570
|
-0.30373
|
1.40961
|
|
ARIMA Fourier K=c(6)
|
-38.27517
|
45.06588
|
40.60453
|
-17.93086
|
18.75396
|
-0.29797
|
1.43071
|
|
NN, K=c(2)
|
-41.44304
|
47.88450
|
43.45199
|
-19.31467
|
20.02454
|
-0.28125
|
1.52564
|
|
NN, K=c(4)
|
-37.07205
|
43.90341
|
40.93468
|
-17.25721
|
18.62450
|
-0.14014
|
1.41739
|
|
NN, K=c(6)
|
-36.98488
|
43.71657
|
40.37459
|
-17.27148
|
18.47092
|
-0.25300
|
1.40162
|
|
ARIMA Fourier K=c(2), Preci
|
-37.17543
|
44.22498
|
39.98923
|
-17.45795
|
18.45223
|
-0.31427
|
1.40402
|
|
ARIMA Fourier K=c(4), Preci
|
-37.33546
|
44.41671
|
40.02878
|
-17.53163
|
18.48517
|
-0.30409
|
1.40867
|
|
ARIMA Fourier K=c(6), Preci
|
-38.31966
|
45.10509
|
40.63914
|
-17.95084
|
18.77045
|
-0.29871
|
1.43185
|
#kbl(scores_NN_all,
#caption = "Forecast Accuracy for Electricity Demand",
#digits = array(5,ncol(scores_NN_all))) %>%
#kable_styling(full_width = FALSE, position = "center", latex_options = "hold_position")
#RMSE and MAPE
#NN, K=c(2) has the best performance
#forecast period
forecast_start<-start(ARIMA_Four_forecast_1$mean)
forecast_end<-end(ARIMA_Four_forecast_1$mean)
ts_forecast<-window(ts_water_monthly, start=forecast_start, end=forecast_end)
#plot forecasting result
autoplot(ts_testing2)

autoplot(ts_testing2)+
autolayer(NN_forecast_3, series="Neural Network c(6)", PI=FALSE)+
autolayer(sarima_for, series = "SARIMA",PI=FALSE)+
autolayer(ARIMA_Four_forecast_1, series = "ARIMA + Fourier c(2)",PI=FALSE)

#NN c(6) is the best model
NN_all<-nnetar(ts_water_monthly,
p=1,
P=0,
xreg=fourier(ts_water_monthly,K=c(6)))
NN_for_all<-forecast(NN_all,
xreg=fourier(ts_water_monthly,K=c(6),h=36),
h=36)
resi_NN_all<-checkresiduals(NN_all)

##
## Ljung-Box test
##
## data: Residuals from NNAR(1,6)
## Q* = 29.995, df = 24, p-value = 0.1849
##
## Model df: 0. Total lags used: 24
autoplot(ts_water_monthly, series = "Observed Data")+
autolayer(NN_for_all, series = "NN Forecast")+
ylab("Groundwater Levels (feet)")

resi_ETS<-checkresiduals(ETS_fit)

##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,N,N)
## Q* = 88.949, df = 24, p-value = 2.151e-09
##
## Model df: 0. Total lags used: 24
resi_sarima<-checkresiduals(sarima_for)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,2)
## Q* = 20.844, df = 21, p-value = 0.4685
##
## Model df: 3. Total lags used: 24
resi_SS_ll<-checkresiduals(SS_ll_for)

##
## Ljung-Box test
##
## data: Residuals from Local level structural model
## Q* = 57.431, df = 24, p-value = 0.0001459
##
## Model df: 0. Total lags used: 24
resi_ARIMA_Four_1<-checkresiduals(ARIMA_Four_forecast_1)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 20.827, df = 21, p-value = 0.4696
##
## Model df: 3. Total lags used: 24
resi_ARIMA_Four_2<-checkresiduals(ARIMA_Four_forecast_2)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 22.015, df = 21, p-value = 0.3987
##
## Model df: 3. Total lags used: 24
resi_ARIMA_Four_3<-checkresiduals(ARIMA_Four_forecast_3)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 21.316, df = 21, p-value = 0.4398
##
## Model df: 3. Total lags used: 24
resi_ARIMA_Four_XREG_1<-checkresiduals(ARIMA_Four_forecast_XREG_1)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 20.891, df = 21, p-value = 0.4656
##
## Model df: 3. Total lags used: 24
resi_ARIMA_Four_XREG_2<-checkresiduals(ARIMA_Four_forecast_XREG_2)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 22.112, df = 21, p-value = 0.3931
##
## Model df: 3. Total lags used: 24
resi_ARIMA_Four_XREG_3<-checkresiduals(ARIMA_Four_forecast_XREG_3)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 21.301, df = 21, p-value = 0.4407
##
## Model df: 3. Total lags used: 24
resi_NN_forecast_1<-checkresiduals(NN_forecast_1)

##
## Ljung-Box test
##
## data: Residuals from NNAR(1,3)
## Q* = 51.949, df = 24, p-value = 0.0007945
##
## Model df: 0. Total lags used: 24
resi_NN_forecast_2<-checkresiduals(NN_forecast_2)

##
## Ljung-Box test
##
## data: Residuals from NNAR(1,5)
## Q* = 43.333, df = 24, p-value = 0.009111
##
## Model df: 0. Total lags used: 24
resi_NN_forecast_3<-checkresiduals(NN_forecast_3)

##
## Ljung-Box test
##
## data: Residuals from NNAR(1,6)
## Q* = 43.068, df = 24, p-value = 0.009772
##
## Model df: 0. Total lags used: 24